22 May, 2017
suppressWarnings(library(tidyverse))
suppressWarnings(library(magrittr))
suppressWarnings(library(knitr))
suppressWarnings(library(broom))
suppressWarnings(library(arsRtools))
deeptools_file_iasillo <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz'
deeptools_file_meola <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz'
biotype <- 'PROMPT'
anchor <- 'TSS'
This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.
#!/bin/sh
cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws
bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_PROMPT_TSSs.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS PROMPTs "eGFP" "Ars2,Cbp80,Z18" \
"deeptools_out/Chen_PROMPT_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"
cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/
bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_PROMPT_TSSs.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS PROMPTs "EGFP" "RRP40" \
"deeptools_out/Chen_PROMPT_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"
matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_PROMPT_TSS_sensitivity_joined_sensitivity.gz
biotype: PROMPT
anchor: TSS
note for the PROMPTs the ids is simply the TSS information
(bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola))
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')
Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_PROMPT_TSS.RData
save(bin_values, file=bin_values_filename)
everything below is run.
load(bin_values_filename, verbose=TRUE)
## Loading objects:
## bin_values
data('RNAseq_value_heatmap_theme', package='arsRtools')
row_order <- bin_values %>%
group_by(id) %>%
summarize(total_value = sum(value)) %>%
ungroup %>%
arrange(total_value) %$%
id
bin_values %<>%
mutate(id = factor(id, levels=row_order))
bin_values %>%
filter(value > 0,
rel_pos > -2000, rel_pos < 5000) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_value_heatmap_theme +
theme(axis.text.y = element_blank())
bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)
data('RNAseq_lineplot_theme', package='arsRtools')
bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(t.test(log2(.$value)))) %>%
ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=siRNA, color=NULL), alpha=.5) +
geom_line(aes(color=siRNA)) +
ylab('mean log2 fold-change relative control') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(t.test(log2(.$value)))) %>%
group_by(siRNA) %>%
mutate(estimate = (estimate-min(estimate))/(max(estimate)-min(estimate))) %>%
ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
geom_line(aes(color=siRNA)) +
ylab('mean log2 fold-change relative control') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
wilcox_fdr <- bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(wilcox.test(log2(.$value), alternative='greater'))) %>%
mutate(padj = p.adjust(p.value, method='BH'))
ggplot(wilcox_fdr, aes(x=rel_pos, y=-log10(padj), color=siRNA)) +
geom_line(aes(color=siRNA)) +
geom_hline(yintercept = 1, color='orange', linetype=2) +
facet_grid(siRNA~.) +
ylab('-log10(fdr)') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
point of below FDR=1 ….
wilcox_fdr %>%
group_by(siRNA) %>%
filter(rel_pos > 0 & padj > .1) %>%
summarize(min_rel_pos = min(rel_pos))
## # A tibble: 2 × 2
## siRNA min_rel_pos
## <fctr> <dbl>
## 1 Ars2 4900
## 2 Z18 1300
data('RNAseq_logFC_heatmap_theme', package='arsRtools')
bin_sensitivities %>%
filter(rel_pos > -2000, rel_pos < 5000) %>%
mutate(id = factor(id, levels=row_order)) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
RNAseq_logFC_heatmap_theme +
theme(axis.text.y = element_blank()) +
xlab(paste0('bp to ', biotype, ' ', anchor))
bin_sensitivities %>%
mutate(value = case_when(.$value > 4 ~ 4,
.$value < .25 ~ .25,
TRUE ~ .$value)) %>%
filter(rel_pos > -2000, rel_pos < 5000) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
RNAseq_logFC_heatmap_theme +
theme(axis.text.y=element_blank()) +
xlab(paste0('bp to ', biotype, ' ', anchor))
deeptools_file_iasillo <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz'
deeptools_file_meola <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz'
biotype <- 'eRNA'
anchor <- 'TSS'
This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.
#!/bin/sh
cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws
bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_eRNA_TSSs_uniq.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS eRNAs "eGFP" "Ars2,Cbp20,Cbp80,NCBP3,Z18" \
"deeptools_out/Chen_eRNA_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"
cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/
bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/Chen_etal/Chen_eRNA_TSSs_uniq.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS eRNAs "EGFP" "RRP40" \
"deeptools_out/Chen_eRNA_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"
matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Chen_eRNA_TSS_sensitivity_joined_sensitivity.gz
biotype: eRNA
anchor: TSS
note for the eRNAs the ids is useless (its actually the distance between forward and reverse TSS from Chen et al) so I replace it with something more informative
(bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola) %>%
mutate(id=paste0(chr, ':', start, '-', end)))
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')
Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_eRNA_TSS.RData
save(bin_values, file=bin_values_filename)
everything below is run.
load(bin_values_filename, verbose=TRUE)
## Loading objects:
## bin_values
row_order <- bin_values %>%
group_by(id) %>%
summarize(total_value = sum(value)) %>%
ungroup %>%
arrange(total_value) %$%
id
bin_values %<>%
mutate(id = factor(id, levels=row_order))
bin_values %>%
filter(value > 0,
rel_pos > -2000, rel_pos < 5000) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_value_heatmap_theme +
theme(axis.text.y = element_blank())
bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)
bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(t.test(log2(.$value)))) %>%
ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=siRNA, color=NULL), alpha=.5) +
geom_line(aes(color=siRNA)) +
ylab('mean log2 fold-change relative control') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(t.test(log2(.$value)))) %>%
group_by(siRNA) %>%
mutate(estimate = (estimate-min(estimate))/(max(estimate)-min(estimate))) %>%
ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
geom_line(aes(color=siRNA)) +
ylab('mean log2 fold-change relative control') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
wilcox_fdr <- bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(wilcox.test(log2(.$value), alternative='greater'))) %>%
mutate(padj = p.adjust(p.value, method='BH'))
ggplot(wilcox_fdr, aes(x=rel_pos, y=-log10(padj), color=siRNA)) +
geom_line(aes(color=siRNA)) +
geom_hline(yintercept = 1, color='orange', linetype=2) +
facet_grid(siRNA~.) +
ylab('-log10(fdr)') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
point of below FDR=1 ….
wilcox_fdr %>%
group_by(siRNA) %>%
filter(rel_pos > 0 & padj > .1) %>%
summarize(min_rel_pos = min(rel_pos))
## # A tibble: 1 × 2
## siRNA min_rel_pos
## <fctr> <dbl>
## 1 Z18 1350
bin_sensitivities %>%
filter(rel_pos > -2000, rel_pos < 5000) %>%
mutate(id = factor(id, levels=row_order)) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
RNAseq_logFC_heatmap_theme +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
theme(axis.text.y = element_blank())
bin_sensitivities %>%
mutate(value = case_when(.$value > 4 ~ 4,
.$value < .25 ~ .25,
TRUE ~ .$value)) %>%
filter(rel_pos > -2000, rel_pos < 5000) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
RNAseq_logFC_heatmap_theme +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
theme(axis.text.y = element_blank())
sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] arsRtools_0.1 RColorBrewer_1.1-2 rtracklayer_1.30.4
## [4] GenomicRanges_1.22.4 GenomeInfoDb_1.6.3 IRanges_2.4.8
## [7] S4Vectors_0.8.11 BiocGenerics_0.16.1 broom_0.4.1
## [10] knitr_1.15.1 magrittr_1.5 dplyr_0.5.0
## [13] purrr_0.2.2 readr_1.0.0 tidyr_0.6.0
## [16] tibble_1.2 ggplot2_2.2.0 tidyverse_1.0.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.8 futile.logger_1.4.3
## [3] XVector_0.10.0 plyr_1.8.4
## [5] futile.options_1.0.0 bitops_1.0-6
## [7] zlibbioc_1.16.0 tools_3.2.3
## [9] digest_0.6.10 evaluate_0.10
## [11] gtable_0.2.0 nlme_3.1-128
## [13] lattice_0.20-34 psych_1.6.9
## [15] DBI_0.5-1 yaml_2.1.14
## [17] stringr_1.1.0 Biostrings_2.38.4
## [19] rprojroot_1.1 grid_3.2.3
## [21] Biobase_2.30.0 R6_2.2.0
## [23] BiocParallel_1.4.3 XML_3.98-1.5
## [25] foreign_0.8-67 rmarkdown_1.2
## [27] lambda.r_1.1.9 reshape2_1.4.2
## [29] GenomicAlignments_1.6.3 Rsamtools_1.22.0
## [31] backports_1.0.4 scales_0.4.1
## [33] htmltools_0.3.5 SummarizedExperiment_1.0.2
## [35] assertthat_0.1 mnormt_1.5-5
## [37] colorspace_1.3-2 labeling_0.3
## [39] stringi_1.1.2 RCurl_1.95-4.8
## [41] lazyeval_0.2.0 munsell_0.4.3